These were cell suspensions of meningeal dura, enriched by MACS for cells expressing Cd45, Cd31, and Lyve-1. We sent libraries from ~5000 cells per sample to be sequenced at least 50,000 reads per cell. We might have ended up with more though, because we sequenced in the NovaSeq S2. We are expecting to see a lot of leukocytes and vascular cells. The purpose is to check the effect of sex, Apoe isoform expression, and age (and respective interactions) on the transcriptomes of the different cell subpopulations.
The meningeal layer of the dura mater is a durable, dense fibrous membrane that passes through the foramen magnum and is continuous with the dura mater of the spinal cord. The meningeal layer of the dura mater creates several dural folds that divide the cranial cavity into freely communicating spaces.
Flow markers:
- CD45/Ptprc/B220/Ly-5/Lyt-4/T200: transmembrane protein tyrosine phosphatase, located on most haematopoietic cells, positive selection of leukocytes
- CD31/Pecam1: adhesion and signaling receptor that is expressed on endothelial and hematopoietic cells, positive selection of endothelial cells
- Lyve1/1200012G08Rik/Lyve-1/Xlkd1/lymphatic vessel endothelial HA receptor-1: found primarily on lymphatic endothelial cells
knitr::opts_knit$set(
root.dir = "/research/labs/neurology/fryer/m214960/Da_Mesquita/scripts/R")
# load packages
library(ComplexUpset) # upset()
library(dplyr) # ungroup()
library(ggrepel) # geom_text_repel()
library(ggtree) # BuildClusterTree()
library(gtools) # smartbind()
library(parallel) # detectCores()
library(plotly) # plot_ly()
library(UpSetR) # fromList()
library(Seurat) # Idents()
library(ShinyCell) # createConfig()
#library(slingshot) # slingshot()
#library(tradeSeq)
library(UpSetR) # fromList()
# work in parallel
options(mc.cores = detectCores() - 1)
saveToPDF <- function(...) {
d = dev.copy(pdf,...)
dev.off(d)
}
saveToPNG <- function(...) {
d = dev.copy(png,...)
dev.off(d)
}
# variables
sample_order <- c("E3.2M.M","E3.2M.F","E4.2M.M","E4.2M.F",
"E3.14M.M","E3.14M.F","E4.14M.M","E4.14M.F")
age_order <- c("2 months","14 months")
sex_order <- c("Male","Female")
isoform_order <- c("E3","E4")
sample_colors <- c("gray","red","orange","yellow","green","blue","purple","pink")
age_colors <- c("darkgray","chartreuse3")
sex_colors <- c("darkgray","purple")
isoform_colors <- c("darkgray","cornflowerblue")
# set seed
set.seed(8)
# work in parallel
options(mc.cores = detectCores() - 1)
mouse.unannotated <- readRDS("../../rObjects/mouse_unannotated.rds")
Idents(mouse.unannotated) <- "seurat_clusters"
DefaultAssay(mouse.unannotated) <- "RNA"
mouse.unannotated <- NormalizeData(mouse.unannotated)
mouse.unannotated
## An object of class Seurat
## 39470 features across 35149 samples within 2 assays
## Active assay: RNA (20251 features, 0 variable features)
## 1 other assay present: SCT
## 3 dimensional reductions calculated: pca, harmony, umap
# UMAP with clusters
cluster_colors <- c("black","gray40","gray","red1","blue","magenta1","darkorange2",
"darkorange4","yellow1","yellow4","yellow2","green","lightgreen",
"chartreuse1","forestgreen","cyan","SteelBlue","red4","Aquamarine",
"purple1","purple4","orange","plum1","burlywood2","lightsalmon",
"sandybrown")
DimPlot(object = mouse.unannotated,
reduction = "umap",
repel = TRUE,
cols = cluster_colors,
label = TRUE)
# UMAP by isoform
DimPlot(object = mouse.unannotated,
reduction = "umap",
repel = TRUE,
split.by = "isoform",
cols = cluster_colors)
# UMAP by age
DimPlot(object = mouse.unannotated,
reduction = "umap",
repel = TRUE,
split.by = "age",
cols = cluster_colors)
# UMAP by sex
DimPlot(object = mouse.unannotated,
reduction = "umap",
repel = TRUE,
split.by = "sex",
cols = cluster_colors)
# UMAP by cell cycle phase
DimPlot(object = mouse.unannotated,
reduction = "umap",
repel = TRUE,
split.by = "Phase",
cols = cluster_colors)
# UMAP percent.mt
FeaturePlot(mouse.unannotated,
reduction = "umap",
features = "percent.mt") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
# UMAP percent.ribo
FeaturePlot(mouse.unannotated,
reduction = "umap",
features = "percent.ribo.protein") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
# UMAP percent.hb
FeaturePlot(mouse.unannotated,
reduction = "umap",
features = "percent.hb") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
# UMAP nCount
FeaturePlot(mouse.unannotated,
reduction = "umap",
features = "nCount_RNA") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
# UMAP nFeature
FeaturePlot(mouse.unannotated,
reduction = "umap",
features = "nFeature_RNA") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
# UMAP cell.complexity
FeaturePlot(mouse.unannotated,
reduction = "umap",
features = "cell.complexity") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
# UMAP Ttr expression
FeaturePlot(mouse.unannotated,
reduction = "umap",
features = "Ttr") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
# Cells per sample per cluster
sample_ncells <- FetchData(mouse.unannotated,
vars = c("ident", "sample")) %>%
dplyr::count(ident,sample) %>%
tidyr::spread(ident, n)
sample_ncells
## sample 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 1 E3.2M.M 300 612 370 249 366 239 201 140 220 247 98 149 110 111 58 124 143
## 2 E3.2M.F 458 693 160 296 260 261 249 180 170 298 182 220 132 142 100 121 115
## 3 E4.2M.M 341 277 289 163 197 166 205 290 135 137 103 81 103 123 77 43 50
## 4 E4.2M.F 406 379 273 269 313 269 296 157 124 119 144 73 136 122 101 70 81
## 5 E3.14M.M 490 335 295 312 275 249 274 327 282 206 216 113 184 199 96 85 93
## 6 E3.14M.F 233 368 300 363 335 294 157 355 173 183 152 154 62 11 77 105 96
## 7 E4.14M.M 433 233 369 278 256 250 281 329 310 189 167 92 120 144 94 114 90
## 8 E4.14M.F 691 447 587 527 431 562 355 121 456 294 300 200 216 118 362 192 181
## 17 18 19 20 21 22 23 24
## 1 177 41 56 60 43 31 26 25
## 2 112 75 75 85 114 102 18 14
## 3 87 54 59 40 48 86 25 22
## 4 118 61 53 33 31 58 37 31
## 5 97 113 97 66 88 53 45 29
## 6 54 103 64 58 28 11 13 13
## 7 52 126 81 72 70 61 50 28
## 8 62 159 85 128 85 50 52 35
# Cells per isoform per cluster
isoform_ncells <- FetchData(mouse.unannotated,
vars = c("ident", "isoform")) %>%
dplyr::count(ident,isoform) %>%
tidyr::spread(ident, n)
isoform_ncells
## isoform 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 1 E3 1481 2008 1125 1220 1236 1043 881 1002 845 934 648 636 488 463 331
## 2 E4 1871 1336 1518 1237 1197 1247 1137 897 1025 739 714 446 575 507 634
## 15 16 17 18 19 20 21 22 23 24
## 1 435 447 440 332 292 269 273 197 102 81
## 2 419 402 319 400 278 273 234 255 164 116
# Cells per sex per cluster
sex_ncells <- FetchData(mouse.unannotated,
vars = c("ident", "sex")) %>%
dplyr::count(ident,sex) %>%
tidyr::spread(ident, n)
sex_ncells
## sex 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 1 Male 1564 1457 1323 1002 1094 904 961 1086 947 779 584 435 517 577 325
## 2 Female 1788 1887 1320 1455 1339 1386 1057 813 923 894 778 647 546 393 640
## 15 16 17 18 19 20 21 22 23 24
## 1 366 376 413 334 293 238 249 231 146 104
## 2 488 473 346 398 277 304 258 221 120 93
# Cells per age per cluster
age_ncells <- FetchData(mouse.unannotated,
vars = c("ident", "age")) %>%
dplyr::count(ident,age) %>%
tidyr::spread(ident, n)
age_ncells
## age 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 1 2 months 1505 1961 1092 977 1136 935 951 767 649 801 527 523 481 498
## 2 14 months 1847 1383 1551 1480 1297 1355 1067 1132 1221 872 835 559 582 472
## 14 15 16 17 18 19 20 21 22 23 24
## 1 336 358 389 494 231 243 218 236 277 106 92
## 2 629 496 460 265 501 327 324 271 175 160 105
# User params
goi <- "Malat1"
threshold <- 1
# Subset data
log2.threshold <- log2(threshold + 0.01)
counts.df <- FetchData(mouse.unannotated, vars = goi)
colnames(counts.df) <- "counts"
log2.counts.df <- log2(counts.df + 0.01)
# Histogram
title <- paste0(goi, "\nnCount_RNA > ", threshold)
hist1 <- ggplot(counts.df, aes(x = counts)) +
geom_histogram(bins = 100, fill = "gray", color = "black") +
labs(title = title, x=NULL, y=NULL) +
xlab(paste0(goi, " nCount_RNA")) + ylab("# of Samples") + theme_bw() +
geom_vline(xintercept = threshold, col = "blue") +
annotate("rect",
xmin = -Inf,
xmax = threshold,
ymin = 0,
ymax=Inf,
alpha=0.2,
fill="chocolate4") +
annotate("rect",
xmin = threshold,
xmax = Inf,
ymin = 0,
ymax=Inf,
alpha=0.2,
fill="deepskyblue")
# Histogram log transformed
hist2 <- ggplot(log2.counts.df, aes(x = counts)) +
geom_histogram(bins = 100, fill = "gray", color = "black") +
labs(title = title, x=NULL, y=NULL) +
xlab(paste0("Log2(",goi, " nCount_RNA)")) + ylab("# of Samples") + theme_bw() +
geom_vline(xintercept = log2.threshold, col = "blue") +
annotate("rect",
xmin = -Inf,
xmax = log2.threshold,
ymin = 0,
ymax=Inf,
alpha=0.2,
fill="chocolate4") +
annotate("rect",
xmin = log2.threshold,
xmax = Inf,
ymin = 0,
ymax=Inf,
alpha=0.2,
fill="deepskyblue")
# plot
plots1 <- list(hist1,hist2)
layout1 <- rbind(c(1),c(2))
grid1 <- grid.arrange(grobs = plots1, layout_matrix = layout1)
# user define variable
goi <- "Malat1"
# Extract counts data
DefaultAssay(mouse.unannotated) <- "RNA"
Idents(mouse.unannotated) <- "SCT_snn_res.0.5"
geneData <- FetchData(mouse.unannotated,
vars = goi,
slot = "counts")
geneData <- geneData > 1
table(geneData)
## geneData
## FALSE TRUE
## 3645 31504
mouse.unannotated$Expression <- geneData
FetchData(mouse.unannotated,
vars = c("ident", "Expression")) %>%
dplyr::count(ident, Expression) %>%
tidyr::spread(ident, n)
## Expression 0 1 2 3 4 5 6 7 8 9 10 11 12
## 1 FALSE 36 29 1569 172 61 818 469 9 1 6 56 3 116
## 2 TRUE 3316 3315 1074 2285 2372 1472 1549 1890 1869 1667 1306 1079 947
## 13 14 15 16 17 18 19 20 21 22 23 24
## 1 NA 200 NA 2 1 55 4 24 10 3 NA 1
## 2 970 765 854 847 758 677 566 518 497 449 266 196
# Plot
mouse.unannotated@meta.data %>%
group_by(SCT_snn_res.0.5, Expression) %>%
dplyr::count() %>%
group_by(SCT_snn_res.0.5) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=SCT_snn_res.0.5,y=percent, fill=Expression)) +
geom_col() +
ggtitle(paste0("Percentage of cells with > 1 counts for ", goi)) +
theme(axis.text.x = element_text(angle = 90))
mouse.unannotated <- BuildClusterTree(object = mouse.unannotated,
dims = 1:21, # min.pc in processing script
reorder = FALSE,
reorder.numeric = FALSE)
tree <- mouse.unannotated@tools$BuildClusterTree
tree$tip.label <- paste0(tree$tip.label)
ggtree::ggtree(tree, aes(x, y)) +
scale_y_reverse() +
ggtree::geom_tree() +
ggtree::theme_tree() +
ggtree::geom_tiplab(offset = 1) +
ggtree::geom_tippoint(color = cluster_colors[1:length(tree$tip.label)], shape = 16, size = 5) +
coord_cartesian(clip = 'off') +
theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))
# Printing out the most variable genes driving PCs
print(x = mouse.unannotated[["harmony"]],
dims = 1:10,
nfeatures = 10)
## harmony_ 1
## Positive: Lyz2, Cd74, C1qa, C1qb, Mrc1, Apoe, H2-Aa, H2-Ab1, C1qc, H2-Eb1
## Negative: Ptprb, Flt1, Mgp, Plpp3, Igfbp7, Kdr, Sparc, Col4a1, Plvap, Podxl
## harmony_ 2
## Positive: Col1a2, Igfbp5, Col1a1, Mgp, Col12a1, Slc38a2, Cdh11, Ogn, Ecrg4, Gas1
## Negative: Flt1, Ptprb, Kdr, Plvap, Podxl, Igfbp3, Pecam1, Egfl7, Emcn, Ly6c1
## harmony_ 3
## Positive: C1qa, Apoe, Mrc1, C1qb, C1qc, Pf4, Selenop, Stab1, Ctsb, Dab2
## Negative: S100a9, S100a8, Cd52, S100a6, Hp, Hmgb2, Lcn2, Pglyrp1, S100a11, Retnlg
## harmony_ 4
## Positive: S100a9, S100a8, Lyz2, Mmp9, Hp, Lcn2, Retnlg, Pglyrp1, Mmp8, Wfdc21
## Negative: Cd79a, Cd74, Cd79b, Ly6d, Ms4a1, Ptprcap, Vpreb3, Siglecg, Pax5, Spib
## harmony_ 5
## Positive: Cd74, H2-Ab1, H2-Aa, H2-Eb1, Ccr2, Mpeg1, Cd52, S100a4, Ctss, Crip1
## Negative: Stab1, Mrc1, Cpa3, Cma1, Tpsb2, Mcpt4, Il1rl1, Serpinb1a, Mrgprb1, Cd163
## harmony_ 6
## Positive: Cd79a, Cd79b, Ly6d, Ms4a1, Vpreb3, Pax5, Plpp3, Cd24a, Spib, Siglecg
## Negative: Crip1, Myh11, Rgs5, Acta2, Myl9, Tagln, Ccr2, Notch3, S100a4, Vim
## harmony_ 7
## Positive: Il1rl1, Kit, Cpa3, Tpsb2, Cma1, Mcpt4, Mrgprb1, Slc18a2, Ms4a2, Cd200r3
## Negative: Myl9, Rgs5, Myh11, Notch3, Acta2, Tagln, Tpm2, Cd79a, Ebf1, Igfbp7
## harmony_ 8
## Positive: Ms4a1, Ly6d, Ltb, Cd37, Cd52, Ikzf3, Ptprc, Cd79a, Cd79b, Cd74
## Negative: Mki67, Top2a, Stmn1, Hist1h1b, Knl1, Kif11, H2afz, Hmgb2, Pclaf, Prc1
## harmony_ 9
## Positive: Igfbp3, Igfbp7, Cpa3, Tpsb2, Cma1, Mcpt4, Col4a1, Mrgprb1, Kit, Serpinb1a
## Negative: Vwf, Cfh, Slco1a4, Cldn5, Clec14a, Slco2a1, Adgrg6, Mal, Ptn, Lrg1
## harmony_ 10
## Positive: Vwf, Cd74, H2-Ab1, H2-Aa, H2-Eb1, Cfh, Cd79a, Slco1a4, Ly6d, Cldn5
## Negative: Ms4a4b, Gimap3, Il7r, Trbc2, Skap1, Cd3g, Bcl11b, Cd3e, Ccl5, Trbc1
# print top variable genes
DefaultAssay(mouse.unannotated) <- "SCT"
VariableFeatures(mouse.unannotated)[1:100]
## [1] "S100a9" "S100a8" "Igfbp5" "Mgp" "Cd74" "Ngp"
## [7] "Retnlg" "Rgs5" "Ltf" "Lcn2" "Cpa3" "Camp"
## [13] "Tpsb2" "Cma1" "Mcpt4" "Igfbp3" "Mmp8" "Col1a2"
## [19] "H2-Ab1" "Igf2" "Wfdc21" "Dcn" "Ccl8" "Ly6d"
## [25] "Chil3" "H2-Eb1" "Lyz2" "Vpreb3" "Ms4a1" "H2-Aa"
## [31] "Vwf" "Cd79a" "Ccl5" "Myh11" "Acta2" "Col1a1"
## [37] "Mki67" "Mrgprb1" "Fabp4" "Tagln" "Gpx3" "Col12a1"
## [43] "H19" "Kit" "Lum" "Kcna1" "Ogn" "Cxcl12"
## [49] "Cldn5" "Apod" "Flt1" "Mrc1" "Top2a" "Hist1h1b"
## [55] "Slc38a2" "Apoe" "Cdh11" "S100a6" "Hmgb2" "Ecrg4"
## [61] "Tpm2" "S100a4" "Ly6c2" "Ptprb" "Mpz" "Comp"
## [67] "C1qa" "Plac8" "Pf4" "Cd79b" "Cxcl9" "Fn1"
## [73] "Ifitm6" "Plp1" "Col3a1" "Il1rl1" "Slpi" "Ccl12"
## [79] "Myl9" "Ptgds" "Kdr" "Hist1h3c" "Ccr2" "Pglyrp1"
## [85] "Mfap4" "Hist1h2ae" "Mmp9" "Fmod" "Cd209f" "Il1b"
## [91] "C1qb" "Mbp" "Vtn" "Slco1a4" "Snhg11" "Scn7a"
## [97] "Mmrn1" "Penk" "Gzma" "Gas1"
DefaultAssay(mouse.unannotated) <- "RNA"
VlnPlot(mouse.unannotated,
features = "Ptprc",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##
## This message will be shown once per session.
VlnPlot(mouse.unannotated,
features = "Lyve1",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Pecam1",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Cd19",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Fcrla",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Cd79a",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Cd79b",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Ms4a1",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Mrc1",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Ms4a7",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Pf4",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Lyve1",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Trac",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Cd3d",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Cd3e",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Cd3g",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Cd8a",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Cd4",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Pecam1",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Cdh5",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Kdr",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Flt1",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Plvap",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Col1a1",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Col1a2",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Dcn",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Ogn",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Gas1",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Fcer1a",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Kit", # aka Cd117
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Itgam", # aka Cd11b
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Cd14",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Cd68",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Ccr5",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Ly6g",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Itgam", # aka Cd11b
cols = cluster_colors)
VlnPlot(mouse.unannotated,
features = "Cxcr4",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
Idents(mouse.unannotated) <- "SCT_snn_res.0.5"
goi1 <- c("Cd3e","Trbc1","Cd4","Cd8a","Foxp3","Tbx21","Gata3","Thy1",
"Cd19","Ms4a1","Cd27","Ighg1","Ptprc","Ly6g","Itgam","Aif1","Klrb1")
goi2 <- c("Adgre1","Ms4a3","Ly6c2","Mrc1","Lyz2","Cd74","Cd83","Cd14","H2-Aa",
"H2-Ab1","Sirpa","Xcr1","Siglech","Itgax","Kit","Mcpt4")
goi3 <- c("Pdgfra","Col1a1","Lum","Pdgfrb","Rgs5","Cspg4","Acta2","Tagln",
"Pecam1","Cd34","Plvap","Stmn2","Slc38a5","Vwf","Mfsd2a","Cldn5")
goi4 <- c("Prox1","Flt4","Pdpn","Lyve1","Sox10","Mbp","Fgf13","Kcnab2","Tubb3",
"Slc17a6","Shank2","Erbb4","Park7","Kif5b","Slc4a1","Hmbs")
goi5 <- c("Pecam1","Flt4","Itgam","Mrc1","Cd3e","Gata3","Cd19","Ly6g","Pdgfrb",
"Kit","Col1a1","Pmp22","Hmbs")
v1 <- VlnPlot(mouse.unannotated,
features = goi1,
cols = cluster_colors,
split.by = "SCT_snn_res.0.5",
flip = TRUE,
stack = TRUE)
## Warning in FetchData.Seurat(object = object, vars = features, slot = slot): The
## following requested variables were not found: Ighg1
v1
v2 <- VlnPlot(mouse.unannotated,
features = goi2,
cols = cluster_colors,
split.by = "SCT_snn_res.0.5",
flip = TRUE,
stack = TRUE)
v2
v3 <- VlnPlot(mouse.unannotated,
features = goi3,
cols = cluster_colors,
split.by = "SCT_snn_res.0.5",
flip = TRUE,
stack = TRUE)
v3
v4 <- VlnPlot(mouse.unannotated,
features = goi4,
cols = cluster_colors,
split.by = "SCT_snn_res.0.5",
flip = TRUE,
stack = TRUE)
v4
v5 <- VlnPlot(mouse.unannotated,
features = goi5,
cols = cluster_colors,
split.by = "SCT_snn_res.0.5",
flip = TRUE,
stack = TRUE)
v5
# work in parallel
options(mc.cores = detectCores() - 1)
# Find markers for each cluster
# Compares single cluster vs all other clusters
# Default is logfc.threshold = 0.25, min.pct = 0.5
Idents(mouse.unannotated) <- "SCT_snn_res.0.5"
all.markers <- FindAllMarkers(object = mouse.unannotated,
assay = "RNA",
test.use = "MAST",
verbose = TRUE)
# add column
all.markers$delta_pct <- abs(all.markers$pct.1 - all.markers$pct.2)
# rename columns and rows
rownames(all.markers) <- 1:nrow(all.markers)
all.markers <- all.markers[,c(6,7,1,5,2:4,8)]
colnames(all.markers)[c(6,7)] <- c("pct_1","pct_2")
# save
saveRDS(all.markers, "../../rObjects/mouse_all_markers.rds")
# more stringent filtering
all.markers <- all.markers[all.markers$p_val_adj < 0.01,]
# compare
table(all.markers$cluster)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 2786 2260 2717 3378 2038 2667 2427 4312 2027 2350 2609 2737 2780 3817 2750 4084
## 16 17 18 19 20 21 22 23 24
## 3189 2783 2637 2717 2743 3922 4065 1983 915
# subset
cluster0 <- all.markers[all.markers$cluster == 0,]
cluster1 <- all.markers[all.markers$cluster == 1,]
cluster2 <- all.markers[all.markers$cluster == 2,]
cluster3 <- all.markers[all.markers$cluster == 3,]
cluster4 <- all.markers[all.markers$cluster == 4,]
cluster5 <- all.markers[all.markers$cluster == 5,]
cluster6 <- all.markers[all.markers$cluster == 6,]
cluster7 <- all.markers[all.markers$cluster == 7,]
cluster8 <- all.markers[all.markers$cluster == 8,]
cluster9 <- all.markers[all.markers$cluster == 9,]
cluster10 <- all.markers[all.markers$cluster == 10,]
cluster11 <- all.markers[all.markers$cluster == 11,]
cluster12 <- all.markers[all.markers$cluster == 12,]
cluster13 <- all.markers[all.markers$cluster == 13,]
cluster14 <- all.markers[all.markers$cluster == 14,]
cluster15 <- all.markers[all.markers$cluster == 15,]
cluster16 <- all.markers[all.markers$cluster == 16,]
cluster17 <- all.markers[all.markers$cluster == 17,]
cluster18 <- all.markers[all.markers$cluster == 18,]
cluster19 <- all.markers[all.markers$cluster == 19,]
cluster20 <- all.markers[all.markers$cluster == 20,]
cluster21 <- all.markers[all.markers$cluster == 21,]
cluster22 <- all.markers[all.markers$cluster == 22,]
cluster23 <- all.markers[all.markers$cluster == 23,]
cluster24 <- all.markers[all.markers$cluster == 24,]
# save
write.table(all.markers,
"../../results/allClusters/markers/putative_cluster_markers_pvaladj_0.01.tsv",
sep = "\t", quote = FALSE, row.names = FALSE)
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster0$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster0$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster1$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster1$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster2$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster2$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster3$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster3$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster4$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster4$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster6$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster6$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster7$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster7$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster8$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster8$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster9$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster9$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster10$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster10$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster11$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster11$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster12$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster12$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster13$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster13$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster14$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster14$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster15$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster15$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster16$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster16$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster17$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster17$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster18$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster18$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = "Siglecf",
cols = cluster_colors,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster19$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster19$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster20$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster20$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster21$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster21$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster22$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster22$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster23$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster23$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
features = cluster24$gene[1:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
VlnPlot(mouse.unannotated,
features = cluster24$gene[21:40],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "SCT_snn_res.0.5")
# Rename all identities
mouse.annotated <- RenameIdents(object = mouse.unannotated,
"0" = "Kdr-high BECs & LECs 1",
"1" = "Mrc1+H2-Eb1-low macrophages 1",
"2" = "Mrc1+H2-Eb1-low macrophages 2",
"3" = "Fibroblasts 1",
"4" = "Mrc1+H2-Eb1-high macrophages 1",
"5" = "Fibroblasts 2",
"6" = "Kdr-high BECs & LECs 2",
"7" = "Neutrophils",
"8" = "T cells & ILCs 1",
"9" = "Dendritic cells",
"10" = "Vwf+ BECs & LECs",
"11" = "Mrc1+H2-Eb1-high macrophages 2",
"12" = "Pericytes & SMCs",
"13" = "Mature B cells 1",
"14" = "Schwann cells",
"15" = "T cells & ILCs 2",
"16" = "Mrc1+H2-Eb1-low macrophages 3",
"17" = "Mast cells",
"18" = "Vwf+Cldn5+ BECs",
"19" = "Monocytes",
"20" = "Mki67+ macrophages",
"21" = "Mature B cells 2",
"22" = "Developing B cells 1",
"23" = "Developing B cells 2",
"24" = "Doublets BECs / macrophages"
)
mouse.annotated$seurat_clusters <- Idents(mouse.annotated)
mouse.annotated$individual_clusters <- Idents(mouse.annotated)
# reset levels
mouse.annotated$individual_clusters <-
factor(mouse.annotated$individual_clusters,
levels = c("Pericytes & SMCs",
"Vwf+Cldn5+ BECs",
"Vwf+ BECs & LECs",
"Kdr-high BECs & LECs 1", "Kdr-high BECs & LECs 2",
"Doublets BECs / macrophages",
"Mrc1+H2-Eb1-low macrophages 1", "Mrc1+H2-Eb1-low macrophages 2", "Mrc1+H2-Eb1-low macrophages 3",
"Mrc1+H2-Eb1-high macrophages 1", "Mrc1+H2-Eb1-high macrophages 2",
"Dendritic cells",
"Monocytes",
"Neutrophils",
"Mast cells",
"Mki67+ macrophages",
"Fibroblasts 1", "Fibroblasts 2",
"Schwann cells",
"T cells & ILCs 1", "T cells & ILCs 2",
"Developing B cells 1", "Developing B cells 2",
"Mature B cells 1", "Mature B cells 2"))
# umap
umap <- DimPlot(object = mouse.annotated,
group.by = "individual_clusters",
reduction = "umap",
cols = cluster_colors) +
theme(legend.position = "bottom")
umap
# Rename all identities
mouse.merged <- RenameIdents(object = mouse.unannotated,
"0" = "Kdr-high BECs & LECs",
"1" = "Mrc1+H2-Eb1-low macrophages",
"2" = "Mrc1+H2-Eb1-low macrophages",
"3" = "Fibroblasts",
"4" = "Mrc1+H2-Eb1-high macrophages",
"5" = "Fibroblasts",
"6" = "Kdr-high BECs & LECs",
"7" = "Neutrophils",
"8" = "T cells & ILCs",
"9" = "Dendritic cells",
"10" = "Vwf+ BECs & LECs",
"11" = "Mrc1+H2-Eb1-high macrophages",
"12" = "Pericytes & SMCs",
"13" = "Mature B cells",
"14" = "Schwann cells",
"15" = "T cells & ILCs",
"16" = "Mrc1+H2-Eb1-low macrophages",
"17" = "Mast cells",
"18" = "Vwf+Cldn5+ BECs",
"19" = "Monocytes",
"20" = "Mki67+ macrophages",
"21" = "Mature B cells",
"22" = "Developing B cells",
"23" = "Developing B cells",
"24" = "Doublets BECs / macrophages"
)
# save idents and set levels
mouse.annotated$merged_clusters <- Idents(mouse.merged)
mouse.annotated$merged_clusters <-
factor(mouse.annotated$merged_clusters,
levels = c("Pericytes & SMCs",
"Vwf+Cldn5+ BECs",
"Vwf+ BECs & LECs",
"Kdr-high BECs & LECs",
"Doublets BECs / macrophages",
"Mrc1+H2-Eb1-low macrophages",
"Mrc1+H2-Eb1-high macrophages",
"Dendritic cells",
"Monocytes",
"Neutrophils",
"Mast cells",
"Mki67+ macrophages",
"Fibroblasts",
"Schwann cells",
"T cells & ILCs",
"Developing B cells",
"Mature B cells"))
remove(mouse.merged)
# set colors
merged_colors <- c("darkred","firebrick1","darkorange","gold","green",
"darkolivegreen2","darkgreen","cyan","cornflowerblue","blue",
"darkorchid1","deeppink","plum1","burlywood3","azure3","azure4", "black")
# umap
umap <- DimPlot(object = mouse.annotated,
reduction = "umap",
repel = TRUE,
group.by = "merged_clusters",
cols = merged_colors) +
theme(legend.position = "bottom")
umap
embeddings <- mouse.annotated@reductions$umap@cell.embeddings
embeddings <- cbind(embeddings, as.character(mouse.annotated$merged_clusters))
colnames(embeddings)[4] <- "merged_clusters"
embeddings <- as.data.frame(embeddings)
three.dim <- plot_ly(embeddings,
x = ~UMAP_1,
y = ~UMAP_2,
z = ~UMAP_3,
color = ~merged_clusters,
colors = merged_colors,
size = 1)
three.dim <- three.dim %>% add_markers()
three.dim <- three.dim %>% layout(scene = list(xaxis = list(title = 'UMAP_1'),
yaxis = list(title = 'UMAP_2'),
zaxis = list(title = 'UMAP_3')))
three.dim
# Apoe isoform
umap1 <- DimPlot(object = mouse.annotated,
group.by = "merged_clusters",
reduction = "umap",
split.by = "isoform",
repel = TRUE,
cols = merged_colors)
umap1
# age
umap2 <- DimPlot(object = mouse.annotated,
group.by = "merged_clusters",
reduction = "umap",
split.by = "age",
repel = TRUE,
cols = merged_colors)
umap2
# sex
umap3 <- DimPlot(object = mouse.annotated,
group.by = "merged_clusters",
reduction = "umap",
split.by = "sex",
repel = TRUE,
cols = merged_colors)
umap3
# sample
umap4 <- DimPlot(object = mouse.annotated,
group.by = "merged_clusters",
reduction = "umap",
split.by = "sample",
ncol = 2,
repel = TRUE,
cols = merged_colors)
umap4
# phase
umap5 <- DimPlot(object = mouse.annotated,
group.by = "merged_clusters",
reduction = "umap",
split.by = "Phase",
cols = merged_colors,
repel = TRUE)
umap5
# mito.factor
umap6 <- DimPlot(object = mouse.annotated,
group.by = "merged_clusters",
reduction = "umap",
split.by = "mito.factor",
cols = merged_colors,
ncol = 2,
repel = TRUE)
umap6
# UMAP percent.mt
f1 <- FeaturePlot(mouse.annotated,
reduction = "umap",
features = "percent.mt") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f1
# UMAP nCount
f2 <- FeaturePlot(mouse.annotated,
reduction = "umap",
features = "nCount_RNA") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f2
# UMAP nFeature
f3 <- FeaturePlot(mouse.annotated,
reduction = "umap",
features = "nFeature_RNA") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f3
# UMAP percent.ribo
f4 <- FeaturePlot(mouse.annotated,
reduction = "umap",
features = "percent.ribo.protein") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f4
VlnPlot(mouse.annotated,
features = "Ttr",
split.by = "sample",
group.by = "sample",
cols = sample_colors)
VlnPlot(mouse.annotated,
features = "Ttr",
split.by = "merged_clusters",
cols = merged_colors)
FeaturePlot(object = mouse.annotated,
features = "Ttr")
# isoform
b1 <- mouse.annotated@meta.data %>%
group_by(merged_clusters, isoform) %>%
dplyr::count() %>%
group_by(merged_clusters) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=merged_clusters,y=percent, fill=isoform)) +
theme_classic() +
geom_col() +
scale_fill_manual(values = isoform_colors) +
ggtitle("Percentage of isoform per cluster") +
theme(axis.text.x = element_text(angle = 90))
b1
# sex
b2 <- mouse.annotated@meta.data %>%
group_by(merged_clusters, sex) %>%
dplyr::count() %>%
group_by(merged_clusters) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=merged_clusters,y=percent, fill=sex)) +
theme_classic() +
geom_col() +
scale_fill_manual(values = sex_colors) +
ggtitle("Percentage of sex per cluster") +
theme(axis.text.x = element_text(angle = 90))
b2
# age
b3 <- mouse.annotated@meta.data %>%
group_by(merged_clusters, age) %>%
dplyr::count() %>%
group_by(merged_clusters) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=merged_clusters,y=percent, fill=age)) +
theme_classic() +
geom_col() +
scale_fill_manual(values = age_colors) +
ggtitle("Percentage of age per cluster") +
theme(axis.text.x = element_text(angle = 90))
b3
# sample
b4 <- mouse.annotated@meta.data %>%
group_by(merged_clusters, sample) %>%
dplyr::count() %>%
group_by(merged_clusters) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=merged_clusters,y=percent, fill=sample)) +
theme_classic() +
geom_col() +
scale_fill_manual(values = sample_colors) +
ggtitle("Percentage of age per cluster") +
theme(axis.text.x = element_text(angle = 90))
# phase
b5 <- mouse.annotated@meta.data %>%
group_by(merged_clusters, Phase) %>%
dplyr::count() %>%
group_by(merged_clusters) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=merged_clusters,y=percent, fill=Phase)) +
theme_classic() +
geom_col() +
ggtitle("Percentage of age per cluster") +
theme(axis.text.x = element_text(angle = 90))
b5
# mito.factor
b6 <- mouse.annotated@meta.data %>%
group_by(merged_clusters, mito.factor) %>%
dplyr::count() %>%
group_by(merged_clusters) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=merged_clusters,y=percent, fill=mito.factor)) +
theme_classic() +
geom_col() +
ggtitle("Percentage of age per cluster") +
theme(axis.text.x = element_text(angle = 90))
b6
Idents(mouse.annotated) <- mouse.annotated$individual_clusters
mouse.annotated <- BuildClusterTree(object = mouse.annotated,
dims = 1:15,
reorder = FALSE,
reorder.numeric = FALSE)
tree <- mouse.annotated@tools$BuildClusterTree
tree$tip.label <- paste0(tree$tip.label)
p <- ggtree::ggtree(tree, aes(x, y)) +
scale_y_reverse() +
ggtree::geom_tree() +
ggtree::theme_tree() +
ggtree::geom_tiplab(offset = 1) +
ggtree::geom_tippoint(color = cluster_colors[1:length(tree$tip.label)], shape = 16, size = 5) +
coord_cartesian(clip = 'off') +
theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))
p
Idents(mouse.annotated) <- mouse.annotated$merged_clusters
mouse.annotated <- BuildClusterTree(object = mouse.annotated,
dims = 1:21,
reorder = FALSE,
reorder.numeric = FALSE)
tree <- mouse.annotated@tools$BuildClusterTree
tree$tip.label <- paste0(tree$tip.label)
p <- ggtree::ggtree(tree, aes(x, y)) +
scale_y_reverse() +
ggtree::geom_tree() +
ggtree::theme_tree() +
ggtree::geom_tiplab(offset = 1) +
ggtree::geom_tippoint(color = merged_colors[1:length(tree$tip.label)], shape = 16, size = 5) +
coord_cartesian(clip = 'off') +
theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))
p
goi1 <- c("Cd3e","Trbc1","Cd4","Cd8a","Foxp3","Tbx21","Gata3","Thy1",
"Cd19","Ms4a1","Cd27","Ptprc","Ly6g","Itgam","Aif1","Klrb1")
goi2 <- c("Adgre1","Ms4a3","Ly6c2","Mrc1","Lyz2","Cd74","Cd83","Cd14","H2-Aa",
"H2-Ab1","Sirpa","Xcr1","Siglech","Itgax","Kit","Mcpt4")
goi3 <- c("Pdgfra","Col1a1","Lum","Pdgfrb","Rgs5","Cspg4","Acta2","Tagln",
"Pecam1","Cd34","Plvap","Stmn2","Slc38a5","Vwf","Mfsd2a","Cldn5")
goi4 <- c("Prox1","Flt4","Pdpn","Lyve1","Sox10","Mbp","Fgf13","Kcnab2","Tubb3",
"Slc17a6","Shank2","Erbb4","Park7","Kif5b","Slc4a1","Hmbs")
goi5 <- c("Pecam1","Flt4","Itgam","Mrc1","Cd3e","Gata3","Cd19","Ly6g","Pdgfrb",
"Kit","Col1a1","Pmp22","Hmbs")
goi6 <- c("Pdgfrb","Rgs5","Pecam1","Vwf","Cldn5","Kdr","Prox1","Flt4","Ptprc",
"Itgam","Aif1","Mrc1","H2-Eb1","Itgax","Ccr2","Ly6c2","Ly6g","Kit",
"Mki67","Col1a2","Plp1","Trbc2","Gata3","Il7r","Cd19","Ms4a1")
goi7 <- c("Kdr","Flt4","Prox1","Lyve1","Nrp2","Itga9","Itgb1","Grb2","Tnc","Plcg1","Crk")
goi8 <- c("Vegfc","Vegfd","Nrp2","Itga9","Itgb1","Grb2","Vcam1","Fn1","Crk","Sema3c","Sema3f")
Idents(mouse.annotated) <- "merged_clusters"
v1 <- VlnPlot(mouse.annotated,
features = goi1,
split.by = 'merged_clusters',
cols = merged_colors,
flip = TRUE,
stack = TRUE) +
theme(legend.position = "none", axis.text.x = element_text(angle = 90))
v1
v2 <- VlnPlot(mouse.annotated,
features = goi2,
split.by = 'merged_clusters',
cols = merged_colors,
flip = TRUE,
stack = TRUE) +
theme(legend.position = "none", axis.text.x = element_text(angle = 90))
v2
v3 <- VlnPlot(mouse.annotated,
features = goi3,
split.by = 'merged_clusters',
cols = merged_colors,
flip = TRUE,
stack = TRUE) +
theme(legend.position = "none", axis.text.x = element_text(angle = 90))
v3
v4 <- VlnPlot(mouse.annotated,
features = goi4,
split.by = 'merged_clusters',
cols = merged_colors,
flip = TRUE,
stack = TRUE) +
theme(legend.position = "none", axis.text.x = element_text(angle = 90))
v4
v5 <- VlnPlot(mouse.annotated,
features = goi5,
split.by = 'merged_clusters',
cols = merged_colors,
flip = TRUE,
stack = TRUE) +
theme(legend.position = "none", axis.text.x = element_text(angle = 90))
v5
v6 <- VlnPlot(mouse.annotated,
features = goi6,
split.by = 'merged_clusters',
cols = merged_colors,
flip = TRUE,
stack = TRUE) +
theme(legend.position = "none", axis.text.x = element_text(angle = 90))
v6
v7 <- VlnPlot(mouse.annotated,
features = goi7,
cols = isoform_colors,
split.by = "isoform",
group.by = "merged_clusters",
flip = TRUE,
stack = TRUE)
v7
v8 <- VlnPlot(mouse.annotated,
features = goi7,
cols = sex_colors,
split.by = "sex",
group.by = "merged_clusters",
flip = TRUE,
stack = TRUE)
v8
v9 <- VlnPlot(mouse.annotated,
features = goi7,
cols = age_colors,
split.by = "age",
group.by = "merged_clusters",
flip = TRUE,
stack = TRUE)
v9
v10 <- VlnPlot(mouse.annotated,
features = goi8,
cols = isoform_colors,
split.by = "isoform",
group.by = "merged_clusters",
flip = TRUE,
stack = TRUE)
v10
v11 <- VlnPlot(mouse.annotated,
features = goi8,
cols = sex_colors,
split.by = "sex",
group.by = "merged_clusters",
flip = TRUE,
stack = TRUE)
v11
v12 <- VlnPlot(mouse.annotated,
features = goi8,
cols = age_colors,
split.by = "age",
group.by = "merged_clusters",
flip = TRUE,
stack = TRUE)
v12
# intitialize variables
cell_types <- unique(mouse.annotated$merged_clusters)
isoform.df <- data.frame()
# loop through clusters
for (i in cell_types) {
print(i)
cluster <- subset(mouse.annotated, merged_clusters == i)
Idents(cluster) <- cluster$isoform
markers <- FindMarkers(object = cluster,
ident.1 = "E4",
ident.2 = "E3",
only.pos = FALSE, # default
min.pct = 0.10, # default
test.use = "MAST",
verbose = TRUE,
assay = "RNA")
if(nrow(markers) == 0) {
next
}
markers$cluster <- i
markers$gene <- rownames(markers)
isoform.df <- rbind(isoform.df, markers)
}
# reformat table
colnames(isoform.df)[c(3,4)] <- c("percent_E4","percent_E3")
rownames(isoform.df) <- 1:nrow(isoform.df)
isoform.df$percent_difference <- abs(isoform.df$percent_E4 - isoform.df$percent_E3)
isoform.df <- isoform.df[,c(6,7,1,5,2,3,4,8)]
# write table
write.table(isoform.df, "../../results/allClusters/DEGs/E4_vs_E3_DEGs.tsv",
sep = "\t", quote = FALSE, row.names = FALSE)
cell_types <- unique(mouse.annotated$merged_clusters)
sex.df <- data.frame()
for (i in cell_types) {
print(i)
cluster <- subset(mouse.annotated, merged_clusters == i)
Idents(cluster) <- cluster$sex
markers <- FindMarkers(object = cluster,
ident.1 = "Female",
ident.2 = "Male",
only.pos = FALSE, # default
min.pct = 0.10, # default
test.use = "MAST",
verbose = TRUE,
assay = "RNA")
if(nrow(markers) == 0) {
next
}
markers$cluster <- i
markers$gene <- rownames(markers)
sex.df <- rbind(sex.df, markers)
}
# reformat table
colnames(sex.df)[c(3,4)] <- c("percent_female","percent_male")
rownames(sex.df) <- 1:nrow(sex.df)
sex.df$percent_difference <- abs(sex.df$percent_female - sex.df$percent_male)
sex.df <- sex.df[,c(6,7,1,5,2,3,4,8)]
# write table
write.table(sex.df,
"../../results/allClusters/DEGs/female_vs_male_DEGs.tsv", sep = "\t",
quote = FALSE, row.names = FALSE)
# initialize variables
cell_types <- unique(mouse.annotated$merged_clusters)
age.df <- data.frame()
# loop through clusters
for (i in cell_types) {
print(i)
cluster <- subset(mouse.annotated, merged_clusters == i)
Idents(cluster) <- cluster$age
markers <- FindMarkers(object = cluster,
ident.1 = "14 months",
ident.2 = "2 months",
only.pos = FALSE, # default
min.pct = 0.10, # default
test.use = "MAST",
verbose = TRUE,
assay = "RNA")
if(nrow(markers) == 0) {
next
}
markers$cluster <- i
markers$gene <- rownames(markers)
age.df <- rbind(age.df, markers)
}
# reformat table
colnames(age.df)[c(3,4)] <- c("percent_14mo","percent_2mo")
rownames(age.df) <- 1:nrow(age.df)
age.df$percent_difference <- abs(age.df$percent_14mo - age.df$percent_2mo)
age.df <- age.df[,c(6,7,1,5,2,3,4,8)]
# write table
write.table(age.df,
"../../results/allClusters/DEGs/14_vs_2_months_DEGs.tsv", sep = "\t",
quote = FALSE, row.names = FALSE)
# read tables
isoform.df <- read.table(
"../../results/allClusters/DEGs/E4_vs_E3_DEGs.tsv",
sep = "\t", header = TRUE)
age.df <- read.table(
"../../results/allClusters/DEGs/14_vs_2_months_DEGs.tsv",
sep = "\t", header = TRUE)
sex.df <- read.table(
"../../results/allClusters/DEGs/female_vs_male_DEGs.tsv",
sep = "\t", header = TRUE)
# filter
isoform.df <- isoform.df[isoform.df$p_val_adj < 0.05,]
age.df <- age.df[age.df$p_val_adj < 0.05,]
sex.df <- sex.df[sex.df$p_val_adj < 0.05,]
# add columns
direction <- isoform.df$avg_log2FC > 0
direction <- gsub(TRUE, "isoform_up", direction)
direction <- gsub(FALSE, "isoform_down", direction)
isoform.df$direction <- direction
direction <- age.df$avg_log2FC > 0
direction <- gsub(TRUE, "age_up", direction)
direction <- gsub(FALSE, "age_down", direction)
age.df$direction <- direction
direction <- sex.df$avg_log2FC > 0
direction <- gsub(TRUE, "sex_up", direction)
direction <- gsub(FALSE, "sex_down", direction)
sex.df$direction <- direction
# reformat tables
isoform.df2 <- isoform.df %>%
dplyr::count(cluster,direction) %>%
tidyr::spread(cluster, n)
age.df2 <- age.df %>%
dplyr::count(cluster,direction) %>%
tidyr::spread(cluster, n)
sex.df2 <- sex.df %>%
dplyr::count(cluster,direction) %>%
tidyr::spread(cluster, n)
# master table
df <- smartbind(isoform.df2, sex.df2, age.df2)
df
# read tables
isoform.df <- read.table("../../results/allClusters/DEGs/E4_vs_E3_DEGs.tsv",
sep = "\t", header = TRUE)
age.df <- read.table("../../results/allClusters/DEGs/14_vs_2_months_DEGs.tsv",
sep = "\t", header = TRUE)
sex.df <- read.table("../../results/allClusters/DEGs/female_vs_male_DEGs.tsv",
sep = "\t", header = TRUE)
# filter tables
isoform.df <- isoform.df[isoform.df$p_val_adj < 0.05,]
age.df <- age.df[age.df$p_val_adj < 0.05,]
sex.df <- sex.df[sex.df$p_val_adj < 0.05,]
clusters <- unique(mouse.annotated$merged_clusters)
for (i in clusters) {
# Subset df by cluster
isoform <- subset(isoform.df, isoform.df$cluster == i)
sex <- subset(sex.df, sex.df$cluster == i)
age <- subset(age.df, age.df$cluster == i)
# Subset lists
isoform_up <- subset(isoform$gene, isoform$avg_log2FC > 0)
isoform_down <- subset(isoform$gene, isoform$avg_log2FC < 0)
sex_up <- subset(sex$gene, sex$avg_log2FC > 0)
sex_down <- subset(sex$gene, sex$avg_log2FC < 0)
age_up <- subset(age$gene, age$avg_log2FC > 0)
age_down <- subset(age$gene, age$avg_log2FC < 0)
list_input <- list("Age Up-regulated" = age_up,
"Isoform Up-regulated" = isoform_up,
"Sex Up-regulated" = sex_up,
"Age Down-regulated" = age_down,
"Isoform Down-regulated" = isoform_down,
"Sex Down-regulated" = sex_down)
data <- fromList(list_input)
# store names
names <- c("Sex Down-regulated","Isoform Down-regulated","Age Down-regulated",
"Sex Up-regulated","Isoform Up-regulated","Age Up-regulated")
# plot
upset_gene <- ComplexUpset::upset(data,
names,
set_sizes=(
upset_set_size()
+ geom_text(aes(label=..count..), hjust=1.1, stat='count')
+ expand_limits(y=500)),
queries = list(upset_query("Age Up-regulated", fill = "red"),
upset_query("Isoform Up-regulated", fill = "red"),
upset_query("Sex Up-regulated", fill = "red"),
upset_query("Age Down-regulated", fill = "blue"),
upset_query("Isoform Down-regulated", fill = "blue"),
upset_query("Sex Down-regulated", fill = "blue")),
base_annotations = list('Intersection size' = (
intersection_size(bar_number_threshold=1, width=0.5)
+ scale_y_continuous(expand=expansion(mult=c(0, 0.05)),limits = c(0,400)) # space on top
+ theme(
# hide grid lines
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
# show axis lines
axis.line=element_line(colour='black')))),
stripes = upset_stripes(
geom=geom_segment(size=12), # make the stripes larger
colors=c('grey95', 'white')),
# to prevent connectors from getting the colorured
# use `fill` instead of `color`, together with `shape='circle filled'`
matrix = intersection_matrix(
geom=geom_point(
shape='circle filled',
size=3,
stroke=0.45)),
sort_sets=FALSE,
sort_intersections='descending'
)
upset_gene <- upset_gene + ggtitle(paste0(i,", adj_p_val < 0.05"))
i <- gsub(" ","_",i)
i <- gsub("/","_",i)
i <- gsub("-","_",i)
pdf(paste0("../../results/allClusters/upset/upset_",tolower(i),".pdf"), height = 6, width = 8)
print(upset_gene)
dev.off()
}
variables <- c("sex","age","isoform")
all_clusters <- unique(mouse.annotated$merged_clusters)
for (i in variables) {
# read DEG file
if (i == "sex") {
treatment_vs_control <-
read.delim("../../results/allClusters/DEGs/female_vs_male_DEGs.tsv",
sep = "\t")
} else if (i == "age") {
treatment_vs_control <-
read.delim("../../results/allClusters/DEGs/14_vs_2_months_DEGs.tsv",
sep = "\t")
} else {
treatment_vs_control <-
read.delim("../../results/allClusters/DEGs/E4_vs_E3_DEGs.tsv",
sep = "\t")
}
# assign colors
color_values <- vector()
max <- nrow(treatment_vs_control)
for(row in 1:max){
if (treatment_vs_control$p_val_adj[row] < 0.05){
if (treatment_vs_control$avg_log2FC [row] > 0){
color_values <- c(color_values, 1) # 1 when logFC > 0 and FDRq < 0.05
}
else if (treatment_vs_control$avg_log2FC[row] < 0){
color_values <- c(color_values, 2) # 2 when logFC < 0 and FDRq < 0.05
}
}
else{
color_values <- c(color_values, 3) # 3 when FDRq >= 0.05
}
}
treatment_vs_control$color_adjpval_0.05 <- factor(color_values)
# loop through clusters
for (j in all_clusters) {
# subset cluster
data <- subset(treatment_vs_control, cluster == j)
# plot only if there are DEGs with p_val_adj < 0.05
num <- subset(data, p_val_adj < 0.05)
num <- nrow(num)
if(num != 0) {
# subset genes to label
up <- data[data$color_adjpval_0.05 == 1,]
up10 <- up[1:10,]
down <- data[data$color_adjpval_0.05 == 2,]
down10 <- down[1:10,]
# set manual colors
if (!1 %in% unique(data$color_adjpval_0.05)) {
my_colors <- c("blue","gray")
} else if (!2 %in% unique(data$color_adjpval_0.05)) {
my_colors <- c("red","gray")
} else if (!1 %in% unique(data$color_adjpval_0.05) && !2 %in% unique(data$color_adjpval_0.05)) {
my_colors <- c("gray")
} else {
my_colors <- c("red","blue","gray")
}
# set significance threshold
hadjpval <- (-log10(max(
data$p_val[data$p_val_adj < 0.05],
na.rm=TRUE)))
# plot
p <-
ggplot(data = data,
aes(x = avg_log2FC, # x-axis is logFC
y = -log10(p_val), # y-axis will be -log10 of P.Value
color = color_adjpval_0.05)) + # color is based on factored color column
geom_point(alpha = 0.8, size = 2) + # create scatterplot, alpha makes points transparent
theme_bw() + # set color theme
theme(legend.position = "none") + # no legend
scale_color_manual(values = my_colors) + # set factor colors
labs(
title = "", # no main title
x = expression(log[2](FC)), # x-axis title
y = expression(-log[10] ~ "(" ~ italic("p") ~ "-value)") # y-axis title
) +
theme(axis.title.x = element_text(size = 10),
axis.text.x = element_text(size = 10)) +
theme(axis.title.y = element_text(size = 10),
axis.text.y = element_text(size = 10)) +
geom_hline(yintercept = hadjpval, # horizontal line
colour = "#000000",
linetype = "dashed") +
ggtitle(paste0(j,"\n",i,", p_val_adj < 0.05")) +
geom_text_repel(data = up10,
aes(x = avg_log2FC, y= -log10(p_val), label = gene),
color = "maroon",
fontface="italic",
max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
) +
geom_text_repel(data = down10,
aes(x = avg_log2FC, y= -log10(p_val), label = gene),
color = "navyblue",
fontface="italic",
max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
)
p
# save
clus <- j
clus <- gsub(" ","_",clus)
clus <- gsub("/","_",clus)
clus <- gsub("-","_",clus)
if (i == "sex") {
path <- paste0("../../results/allClusters/volcano/sex/",
tolower(clus),"_sex_volcano")
} else if (i == "age") {
path <- paste0("../../results/allClusters/volcano/age/",
tolower(clus),"_age_volcano")
} else {
path <- paste0("../../results/allClusters/volcano/isoform/",
tolower(clus),"_isoform_volcano")
}
pdf(paste(path,".pdf"), height = 8, width = 8)
print(p)
dev.off()
print(paste("i =",i,", j =",j))
}
} # end loop through clusters
} # end loop through variables
mouse.annotated@assays$RNA@var.features <-
mouse.annotated@assays$SCT@var.features
metadata <- mouse.annotated@meta.data
metadata <- metadata[,c(25,26,2:23)]
mouse.annotated@meta.data <- metadata
mouse.annotated@assays$SCT@meta.features <- metadata
mouse.annotated@assays$RNA@meta.features <- metadata
# make shiny folder
DefaultAssay(mouse.annotated) <- "RNA"
Idents(mouse.annotated) <- mouse.annotated$merged_clusters
sc.config <- createConfig(mouse.annotated)
makeShinyApp(mouse.annotated, sc.config, gene.mapping = TRUE,
shiny.title = "Mouse Single Cell")